using Pkg Pkg.activate(".") using DataFrames, CSV, DataFramesMeta, StatsPlots, Downloads, ProgressMeter, Weave, Loess, Arrow, Transducers, Memoization, ReverseDiff, Zygote, Turing, LinearAlgebra, ColorSchemes, Serialization
In this project, we will download some files from the CA dept of Education which give averaged results for test scores in all schools in CA across various student groups within the schoool.
The data is available at: https://caaspp-elpac.cde.ca.gov/caaspp/ResearchFileListSB?ps=true&lstTestYear=2021&lstTestType=B&lstCounty=00&lstDistrict=00000#dl
We are going to get the Los Angeles county files which avoids unnecessary data.
Unfortunately some years have different "versions" and there is no way on the site to get a list of files, so we need to just by hand click through and find out the file names, for example:
https://caaspp-elpac.cde.ca.gov/caaspp/researchfiles/sbca2015all19csv_v3.zip for 2014-2015 LA County
https://caaspp-elpac.cde.ca.gov/caaspp/researchfiles/sbca2021all19csv_v2.zip for 2020-2021 LA County data...
there is no 2019-2020 data as COVID prevented the testing from happening.
versions = [2015 => 3, 2016 => 3, 2017 => 2, 2018 => 3, 2019 => 4 , 2021 => 2] files = ["sb_ca$(vv[1])_all_19_csv_v$(vv[2]).zip" for vv in versions]
6-element Vector{String}:
"sb_ca2015_all_19_csv_v3.zip"
"sb_ca2016_all_19_csv_v3.zip"
"sb_ca2017_all_19_csv_v2.zip"
"sb_ca2018_all_19_csv_v3.zip"
"sb_ca2019_all_19_csv_v4.zip"
"sb_ca2021_all_19_csv_v2.zip"
We also need the entity file that describes the codes for each school/district etc and the codes for the tests and student group descriptions we'll assume for our analysis that none of the entities of interest changed during the time period and just get the recent ones.
push!(files,"sb_ca2021entities_csv.zip") push!(files,"Tests.zip") push!(files,"StudentGroups.zip")
9-element Vector{String}:
"sb_ca2015_all_19_csv_v3.zip"
"sb_ca2016_all_19_csv_v3.zip"
"sb_ca2017_all_19_csv_v2.zip"
"sb_ca2018_all_19_csv_v3.zip"
"sb_ca2019_all_19_csv_v4.zip"
"sb_ca2021_all_19_csv_v2.zip"
"sb_ca2021entities_csv.zip"
"Tests.zip"
"StudentGroups.zip"
We'll change directory into the data dir, download each of the files, unzip them, and then cd out of the directory.
We use try/finally to ensure that we wind up in the same directory even if there's an error, then read the directory to see what files we have...
try cd("data") @showprogress 3 for f in files if ! ispath(f) # if we don't have the file already Downloads.download("https://caaspp-elpac.cde.ca.gov/caaspp/researchfiles/$(f)",f) end run(`unzip -u $(f)`) end finally cd("..") end readdir("data")
Archive: sb_ca2015_all_19_csv_v3.zip
Archive: sb_ca2016_all_19_csv_v3.zip
Archive: sb_ca2017_all_19_csv_v2.zip
Archive: sb_ca2018_all_19_csv_v3.zip
Archive: sb_ca2019_all_19_csv_v4.zip
Archive: sb_ca2021_all_19_csv_v2.zip
Archive: sb_ca2021entities_csv.zip
Archive: Tests.zip
Archive: StudentGroups.zip
19-element Vector{String}:
"StudentGroups.txt"
"StudentGroups.zip"
"Tests.txt"
"Tests.zip"
"sb_ca2015_all_19_csv_v3.txt"
"sb_ca2015_all_19_csv_v3.zip"
"sb_ca2015_all_ascii_v3.zip"
"sb_ca2016_all_19_csv_v3.txt"
"sb_ca2016_all_19_csv_v3.zip"
"sb_ca2017_all_19_csv_v2.txt"
"sb_ca2017_all_19_csv_v2.zip"
"sb_ca2018_all_19_csv_v3.txt"
"sb_ca2018_all_19_csv_v3.zip"
"sb_ca2019_all_19_csv_v4.txt"
"sb_ca2019_all_19_csv_v4.zip"
"sb_ca2021_all_19_csv_v2.txt"
"sb_ca2021_all_19_csv_v2.zip"
"sb_ca2021entities_csv.txt"
"sb_ca2021entities_csv.zip"
Next, we need to read the data in, and try to subset down to the schools of interest. Unfortunately, the files don't all have the same columns, nor do they have the same delimeters! What do we do about that? DataFrames allows us to append!(df,df2) and handle the case where they don't have exactly the same columns by various methods by specifying the "cols" option (see docs) cols=:union causes the resulting dataset to have all the columns from both files, but with missing values where needed
testscores = DataFrame() entities = DataFrame() tests = DataFrame() students = DataFrame() const cachedir = "/var/cache/userdata/dlakelan/juliacache" if ispath(joinpath(cachedir,"tests.arrow")) global tests = DataFrame(Arrow.Table(joinpath(cachedir,"tests.arrow"))) global students = DataFrame(Arrow.Table(joinpath(cachedir,"students.arrow"))) global entities = DataFrame(Arrow.Table(joinpath(cachedir,"entities.arrow"))) # the dataframe for test scores is huge, 3.8 million rows and a lot of columns, we only care about these districts. # we run the rows of the Arrow.Table through a Filter transducer that filters on District_Code and collect the ones we care about, saving gigabytes # of RAM ourentities = @subset(entities,in.(:District_Name, Ref(["Pasadena Unified","Glendale Unified", "Alhambra Unified"]))) @show ourentities let ourdistcodes = unique(ourentities.District_Code); global testscores = Tables.rows(Arrow.Table(joinpath(cachedir,"testscores.arrow"))) |> Filter(x -> !ismissing(x.District_Code) && x.District_Code in ourdistcodes && x.School_Code .!= 0) |> DataFrame end else for f in filter(endswith(".txt"),readdir("data")) @show f if occursin("entities",f) global entities = CSV.read(joinpath("data",f),DataFrame; normalizenames=true, delim="^") elseif occursin("Student",f) global students = CSV.read(joinpath("data",f),DataFrame; normalizenames=true,delim="^") elseif occursin("Tests",f) global tests = CSV.read(joinpath("data",f),DataFrame; normalizenames=true,delim="^") else df = CSV.read(joinpath("data",f),DataFrame; normalizenames=true) # uses a standard "," delimiter append!(testscores,df; cols=:union) # we will collect all the various columns with missing data where they don't exist end end Arrow.write(joinpath(cachedir,"tests.arrow"),tests) Arrow.write(joinpath(cachedir,"students.arrow"),students) Arrow.write(joinpath(cachedir,"entities.arrow"),entities) Arrow.write(joinpath(cachedir,"testscores.arrow"),testscores) end
ourentities = 78×10 DataFrame
Row │ County_Code District_Code School_Code Filler Test_Year Type_ID
County_Name District_Name School_Name Zip_Cod
e
│ Int64 Int64 Int64 Missing Int64 Int64
String String? String? Int64?
─────┼─────────────────────────────────────────────────────────────────────
───────────────────────────────────────────────────────────────────────────
──
1 │ 19 64568 0 missing 2021 6
Los Angeles Glendale Unified missing missin
g
2 │ 19 64568 1932144 missing 2021 7
Los Angeles Glendale Unified Crescenta Valley High 9121
4
3 │ 19 64568 1933472 missing 2021 7
Los Angeles Glendale Unified Daily (Allan F.) High (Continuat… 9120
6
4 │ 19 64568 1933530 missing 2021 7
Los Angeles Glendale Unified Glendale High 9120
5
5 │ 19 64568 1934082 missing 2021 7
Los Angeles Glendale Unified Herbert Hoover High 9120
2
6 │ 19 64568 1964568 missing 2021 7
Los Angeles Glendale Unified Glendale Unified District Level … 9120
6
7 │ 19 64568 1995497 missing 2021 7
Los Angeles Glendale Unified Verdugo Academy 9121
4
8 │ 19 64568 1996131 missing 2021 7
Los Angeles Glendale Unified Anderson W. Clark Magnet High 9121
4
9 │ 19 64568 1996156 missing 2021 7
Los Angeles Glendale Unified Jewel City Community Day 9120
4
10 │ 19 64568 6013619 missing 2021 7
Los Angeles Glendale Unified Balboa Elementary 9120
1
11 │ 19 64568 6013627 missing 2021 7
Los Angeles Glendale Unified Cerritos Elementary 9120
5
12 │ 19 64568 6013635 missing 2021 7
Los Angeles Glendale Unified Columbus Elementary 9120
3
13 │ 19 64568 6013643 missing 2021 7
Los Angeles Glendale Unified Dunsmore Elementary 9121
4
14 │ 19 64568 6013650 missing 2021 7
Los Angeles Glendale Unified Thomas Edison Elementary 9120
4
15 │ 19 64568 6013676 missing 2021 7
Los Angeles Glendale Unified Benjamin Franklin Elementary 9120
1
16 │ 19 64568 6013684 missing 2021 7
Los Angeles Glendale Unified John C. Fremont Elementary 9120
8
17 │ 19 64568 6013692 missing 2021 7
Los Angeles Glendale Unified Glenoaks Elementary 9120
6
18 │ 19 64568 6013718 missing 2021 7
Los Angeles Glendale Unified Thomas Jefferson Elementary 9120
1
19 │ 19 64568 6013726 missing 2021 7
Los Angeles Glendale Unified Mark Keppel Elementary 9120
2
20 │ 19 64568 6013734 missing 2021 7
Los Angeles Glendale Unified La Crescenta Elementary 9121
4
21 │ 19 64568 6013742 missing 2021 7
Los Angeles Glendale Unified Abraham Lincoln Elementary 9121
4
22 │ 19 64568 6013767 missing 2021 7
Los Angeles Glendale Unified Horace Mann Elementary 9120
5
23 │ 19 64568 6013775 missing 2021 7
Los Angeles Glendale Unified John Marshall Elementary 9120
5
24 │ 19 64568 6013783 missing 2021 7
Los Angeles Glendale Unified Monte Vista Elementary 9121
4
25 │ 19 64568 6013809 missing 2021 7
Los Angeles Glendale Unified Mountain Avenue Elementary 9121
4
26 │ 19 64568 6013817 missing 2021 7
Los Angeles Glendale Unified John Muir Elementary 9120
5
27 │ 19 64568 6013825 missing 2021 7
Los Angeles Glendale Unified Valley View Elementary 9121
4
28 │ 19 64568 6013833 missing 2021 7
Los Angeles Glendale Unified Verdugo Woodlands Elementary 9120
8
29 │ 19 64568 6013841 missing 2021 7
Los Angeles Glendale Unified R. D. White Elementary 9120
6
30 │ 19 64568 6057715 missing 2021 7
Los Angeles Glendale Unified Theodore Roosevelt Middle 9120
6
31 │ 19 64568 6057723 missing 2021 7
Los Angeles Glendale Unified Eleanor J. Toll Middle 9120
2
32 │ 19 64568 6061303 missing 2021 7
Los Angeles Glendale Unified Rosemont Middle 9121
4
33 │ 19 64568 6061311 missing 2021 7
Los Angeles Glendale Unified Woodrow Wilson Middle 9120
6
34 │ 19 64568 6099022 missing 2021 7
Los Angeles Glendale Unified College View 9120
7
35 │ 19 64881 0 missing 2021 6
Los Angeles Pasadena Unified missing missin
g
36 │ 19 64881 117440 missing 2021 7
Los Angeles Pasadena Unified CIS Academy 9110
7
37 │ 19 64881 127746 missing 2021 7
Los Angeles Pasadena Unified Sierra Madre Middle 9102
4
38 │ 19 64881 1931062 missing 2021 7
Los Angeles Pasadena Unified Blair High 9110
6
39 │ 19 64881 1931674 missing 2021 7
Los Angeles Pasadena Unified Marshall Fundamental 9110
4
40 │ 19 64881 1932409 missing 2021 7
Los Angeles Pasadena Unified Norma Coombs Elementary 9110
7
41 │ 19 64881 1936103 missing 2021 7
Los Angeles Pasadena Unified John Muir High 9110
3
42 │ 19 64881 1936806 missing 2021 7
Los Angeles Pasadena Unified Rose City High (Continuation) 9110
1
43 │ 19 64881 1936822 missing 2021 7
Los Angeles Pasadena Unified Pasadena High 9110
7
44 │ 19 64881 1964881 missing 2021 7
Los Angeles Pasadena Unified Pasadena Unified District Level … 9110
1
45 │ 19 64881 6021505 missing 2021 7
Los Angeles Pasadena Unified Altadena Elementary 9100
1
46 │ 19 64881 6021554 missing 2021 7
Los Angeles Pasadena Unified Don Benito Fundamental 9110
7
47 │ 19 64881 6021570 missing 2021 7
Los Angeles Pasadena Unified Field (Eugene) Elementary 9110
7
48 │ 19 64881 6021612 missing 2021 7
Los Angeles Pasadena Unified Hamilton Elementary 9110
7
49 │ 19 64881 6021620 missing 2021 7
Los Angeles Pasadena Unified Jackson Elementary 9100
1
50 │ 19 64881 6021679 missing 2021 7
Los Angeles Pasadena Unified Longfellow (Henry W.) Elementary 9110
4
51 │ 19 64881 6021687 missing 2021 7
Los Angeles Pasadena Unified Madison Elementary 9110
4
52 │ 19 64881 6021729 missing 2021 7
Los Angeles Pasadena Unified San Rafael Elementary 9110
5
53 │ 19 64881 6021737 missing 2021 7
Los Angeles Pasadena Unified Sierra Madre Elementary 9102
4
54 │ 19 64881 6021752 missing 2021 7
Los Angeles Pasadena Unified Washington Middle 9110
3
55 │ 19 64881 6021760 missing 2021 7
Los Angeles Pasadena Unified Webster Elementary 9110
4
56 │ 19 64881 6021778 missing 2021 7
Los Angeles Pasadena Unified Willard Elementary 9110
7
57 │ 19 64881 6058465 missing 2021 7
Los Angeles Pasadena Unified Charles W. Eliot Middle 9100
1
58 │ 19 64881 6119549 missing 2021 7
Los Angeles Pasadena Unified Washington Elementary 9110
3
59 │ 19 64881 6120265 missing 2021 7
Los Angeles Pasadena Unified McKinley 9110
1
60 │ 19 75713 0 missing 2021 6
Los Angeles Alhambra Unified missing missin
g
61 │ 19 75713 1930163 missing 2021 7
Los Angeles Alhambra Unified Alhambra High 9180
1
62 │ 19 75713 1934553 missing 2021 7
Los Angeles Alhambra Unified Mark Keppel High 9180
1
63 │ 19 75713 1937697 missing 2021 7
Los Angeles Alhambra Unified San Gabriel High 9177
6
64 │ 19 75713 1975713 missing 2021 7
Los Angeles Alhambra Unified Alhambra Unified District Level … 9180
3
65 │ 19 75713 1995430 missing 2021 7
Los Angeles Alhambra Unified Independence High (Alternative) 9180
1
66 │ 19 75713 6011001 missing 2021 7
Los Angeles Alhambra Unified Brightwood Elementary 9175
4
67 │ 19 75713 6011019 missing 2021 7
Los Angeles Alhambra Unified Emery Park Elementary 9180
3
68 │ 19 75713 6011027 missing 2021 7
Los Angeles Alhambra Unified Fremont Elementary 9180
3
69 │ 19 75713 6011035 missing 2021 7
Los Angeles Alhambra Unified Garfield Elementary 9180
1
70 │ 19 75713 6011043 missing 2021 7
Los Angeles Alhambra Unified Granada Elementary 9180
1
71 │ 19 75713 6011050 missing 2021 7
Los Angeles Alhambra Unified Marguerita Elementary 9180
3
72 │ 19 75713 6011068 missing 2021 7
Los Angeles Alhambra Unified Martha Baldwin Elementary 9180
1
73 │ 19 75713 6011076 missing 2021 7
Los Angeles Alhambra Unified Monterey Highlands Elementary 9175
4
74 │ 19 75713 6011084 missing 2021 7
Los Angeles Alhambra Unified Park Elementary 9180
1
75 │ 19 75713 6011092 missing 2021 7
Los Angeles Alhambra Unified Ramona Elementary 9180
3
76 │ 19 75713 6011100 missing 2021 7
Los Angeles Alhambra Unified Repetto Elementary 9175
4
77 │ 19 75713 6011118 missing 2021 7
Los Angeles Alhambra Unified William Northrup Elementary 9180
1
78 │ 19 75713 6011126 missing 2021 7
Los Angeles Alhambra Unified Ynez Elementary 9175
4
| Row | County_Code | District_Code | School_Code | Filler | Test_Year | Subgroup_ID | Test_Type | Total_CAASPP_Enrollment | Total_Tested_At_Entity_Level | Total_Tested_at_Subgroup_Level | Grade | Test_Id | CAASPP_Reported_Enrollment | Students_Tested | Mean_Scale_Score | Percentage_Standard_Exceeded | Percentage_Standard_Met | Percentage_Standard_Met_and_Above | Percentage_Standard_Nearly_Met | Percentage_Standard_Not_Met | Students_with_Scores | Area_1_Percentage_Above_Standard | Area_1_Percentage_At_or_Near_Standard | Area_1_Percentage_Below_Standard | Area_2_Percentage_Above_Standard | Area_2_Percentage_At_or_Near_Standard | Area_2_Percentage_Below_Standard | Area_3_Percentage_Above_Standard | Area_3_Percentage_At_or_Near_Standard | Area_3_Percentage_Below_Standard | Area_4_Percentage_Above_Standard | Area_4_Percentage_At_or_Near_Standard | Area_4_Percentage_Below_Standard | Total_Tested_with_Scores | Area_1_Percentage_Near_Standard | Area_2_Percentage_Near_Standard | Area_3_Percentage_Near_Standard | Area_4_Percentage_Near_Standard | County_Code_District_Code_School_Code_Filler_Test_Year_Student_Group_ID_Test_Type_Total_Tested_at_Reporting_Level_Total_Tested_with_Scores_at_Reporting_Level_Grade_Test_ID_Students_Enrolled_Students_Tested_Mean_Scale_Score_Percentage_Standard_Exceeded_Percentage_Standard_Met_Percentage_Standard_Met_and_Above_Percentage_Standard_Nearly_Met_Percentage_Standard_Not_Met_Students_with_Scores_Area_1_Percentage_Above_Standard_Area_1_Percentage_Near_Standard_Area_1_Percentage_Below_Standard_Area_2_Percentage_Above_Standard_Area_2_Percentage_Near_Standard_Area_2_Percentage_Below_Standard_Area_3_Percentage_Above_Standard_Area_3_Percentage_Near_Standard_Area_3_Percentage_Below_Standard_Area_4_Percentage_Above_Standard_Area_4_Percentage_Near_Standard_Area_4_Percentage_Below_Standard_Type_ID |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Int64 | Int64 | Int64 | Missing | Int64 | Int64 | String | Int64? | Any | Int64? | Int64 | Int64 | Any | Any | String? | String | String | String | String | String | Any | String | String? | String | String | String? | String | String | String? | String | String | String? | String | Any | String? | String? | String? | String? | Missing | |
| 1 | 19 | 64568 | 127415 | missing | 2015 | 1 | B | 3 | 2 | 2 | 6 | 1 | 1 | 0 | * | * | * | * | * | * | 0 | * | * | * | * | * | * | * | * | * | * | * | * | missing | missing | missing | missing | missing | missing |
| 2 | 19 | 64568 | 127415 | missing | 2015 | 1 | B | 3 | 2 | 2 | 6 | 2 | 1 | 0 | * | * | * | * | * | * | 0 | * | * | * | * | * | * | * | * | * | * | * | * | missing | missing | missing | missing | missing | missing |
| 3 | 19 | 64568 | 127415 | missing | 2015 | 1 | B | 3 | 2 | 2 | 8 | 2 | 1 | 1 | * | * | * | * | * | * | 0 | * | * | * | * | * | * | * | * | * | * | * | * | missing | missing | missing | missing | missing | missing |
| 4 | 19 | 64568 | 127415 | missing | 2015 | 1 | B | 3 | 2 | 2 | 8 | 1 | 1 | 1 | * | * | * | * | * | * | 0 | * | * | * | * | * | * | * | * | * | * | * | * | missing | missing | missing | missing | missing | missing |
| 5 | 19 | 64568 | 127415 | missing | 2015 | 1 | B | 3 | 2 | 2 | 11 | 1 | 1 | 1 | * | * | * | * | * | * | 0 | * | * | * | * | * | * | * | * | * | * | * | * | missing | missing | missing | missing | missing | missing |
| 6 | 19 | 64568 | 127415 | missing | 2015 | 1 | B | 3 | 2 | 2 | 11 | 2 | 1 | 0 | * | * | * | * | * | * | 0 | * | * | * | * | * | * | * | * | * | * | * | * | missing | missing | missing | missing | missing | missing |
| 7 | 19 | 64568 | 127415 | missing | 2015 | 1 | B | 3 | 2 | 2 | 13 | 2 | 3 | 1 | missing | * | * | * | * | * | 0 | * | * | * | * | * | * | * | * | * | * | * | * | missing | missing | missing | missing | missing | missing |
| 8 | 19 | 64568 | 127415 | missing | 2015 | 1 | B | 3 | 2 | 2 | 13 | 1 | 3 | 2 | missing | * | * | * | * | * | 0 | * | * | * | * | * | * | * | * | * | * | * | * | missing | missing | missing | missing | missing | missing |
| 9 | 19 | 64568 | 127415 | missing | 2015 | 3 | B | 3 | 2 | 2 | 6 | 1 | 1 | 0 | * | * | * | * | * | * | 0 | * | * | * | * | * | * | * | * | * | * | * | * | missing | missing | missing | missing | missing | missing |
| 10 | 19 | 64568 | 127415 | missing | 2015 | 3 | B | 3 | 2 | 2 | 6 | 2 | 1 | 0 | * | * | * | * | * | * | 0 | * | * | * | * | * | * | * | * | * | * | * | * | missing | missing | missing | missing | missing | missing |
| 11 | 19 | 64568 | 127415 | missing | 2015 | 3 | B | 3 | 2 | 2 | 8 | 1 | 1 | 1 | * | * | * | * | * | * | 0 | * | * | * | * | * | * | * | * | * | * | * | * | missing | missing | missing | missing | missing | missing |
| 12 | 19 | 64568 | 127415 | missing | 2015 | 3 | B | 3 | 2 | 2 | 8 | 2 | 1 | 1 | * | * | * | * | * | * | 0 | * | * | * | * | * | * | * | * | * | * | * | * | missing | missing | missing | missing | missing | missing |
| 13 | 19 | 64568 | 127415 | missing | 2015 | 3 | B | 3 | 2 | 2 | 11 | 2 | 1 | 0 | * | * | * | * | * | * | 0 | * | * | * | * | * | * | * | * | * | * | * | * | missing | missing | missing | missing | missing | missing |
| ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ |
| 133494 | 19 | 75713 | 6011126 | missing | 2019 | 190 | B | missing | * | missing | 13 | 2 | * | * | missing | * | * | * | * | * | * | * | missing | * | * | missing | * | * | missing | * | * | missing | * | * | * | * | * | * | missing |
| 133495 | 19 | 75713 | 6011126 | missing | 2019 | 202 | B | missing | 314 | missing | 13 | 2 | 314 | 314 | missing | 42.99 | 24.84 | 67.83 | 15.92 | 16.24 | 314 | 52.23 | missing | 19.43 | 39.49 | missing | 19.43 | 38.85 | missing | 15.29 | 0.00 | missing | 0.00 | 314 | 28.34 | 41.08 | 45.86 | 0.00 | missing |
| 133496 | 19 | 75713 | 6011126 | missing | 2019 | 203 | B | missing | 4 | missing | 13 | 2 | 4 | 4 | missing | * | * | * | * | * | 4 | * | missing | * | * | missing | * | * | missing | * | * | missing | * | 4 | * | * | * | * | missing |
| 133497 | 19 | 75713 | 6011126 | missing | 2019 | 204 | B | missing | 87 | missing | 13 | 2 | 87 | 87 | missing | 8.05 | 9.20 | 17.24 | 27.59 | 55.17 | 87 | 10.34 | missing | 64.37 | 10.34 | missing | 54.02 | 9.20 | missing | 50.57 | 0.00 | missing | 0.00 | 87 | 25.29 | 35.63 | 40.23 | 0.00 | missing |
| 133498 | 19 | 75713 | 6011126 | missing | 2019 | 205 | B | missing | * | missing | 13 | 2 | * | * | missing | * | * | * | * | * | * | * | missing | * | * | missing | * | * | missing | * | * | missing | * | * | * | * | * | * | missing |
| 133499 | 19 | 75713 | 6011126 | missing | 2019 | 206 | B | missing | 4 | missing | 13 | 2 | 4 | 4 | missing | * | * | * | * | * | 4 | * | missing | * | * | missing | * | * | missing | * | * | missing | * | 4 | * | * | * | * | missing |
| 133500 | 19 | 75713 | 6011126 | missing | 2019 | 207 | B | missing | 4 | missing | 13 | 2 | 4 | 4 | missing | * | * | * | * | * | 4 | * | missing | * | * | missing | * | * | missing | * | * | missing | * | 4 | * | * | * | * | missing |
| 133501 | 19 | 75713 | 6011126 | missing | 2019 | 222 | B | missing | 110 | missing | 13 | 2 | 111 | 110 | missing | 57.27 | 23.64 | 80.91 | 13.64 | 5.45 | 110 | 68.18 | missing | 8.18 | 53.64 | missing | 10.00 | 54.55 | missing | 6.36 | 0.00 | missing | 0.00 | 110 | 23.64 | 36.36 | 39.09 | 0.00 | missing |
| 133502 | 19 | 75713 | 6011126 | missing | 2019 | 223 | B | missing | 5 | missing | 13 | 2 | 5 | 5 | missing | * | * | * | * | * | 5 | * | missing | * | * | missing | * | * | missing | * | * | missing | * | 5 | * | * | * | * | missing |
| 133503 | 19 | 75713 | 6011126 | missing | 2019 | 224 | B | missing | 29 | missing | 13 | 2 | 30 | 29 | missing | 17.24 | 20.69 | 37.93 | 17.24 | 44.83 | 29 | 13.79 | missing | 51.72 | 20.69 | missing | 31.03 | 20.69 | missing | 37.93 | 0.00 | missing | 0.00 | 29 | 34.48 | 48.28 | 41.38 | 0.00 | missing |
| 133504 | 19 | 75713 | 6011126 | missing | 2019 | 226 | B | missing | * | missing | 13 | 2 | * | * | missing | * | * | * | * | * | * | * | missing | * | * | missing | * | * | missing | * | * | missing | * | * | * | * | * | * | missing |
| 133505 | 19 | 75713 | 6011126 | missing | 2019 | 227 | B | missing | * | missing | 13 | 2 | * | * | missing | * | * | * | * | * | * | * | missing | * | * | missing | * | * | missing | * | * | missing | * | * | * | * | * | * | missing |
Some columns got parsed as strings rather than numbers. We need to "tryparse" these as Float64, if that returns nothing, we'll replace with missing, if the
function floatormiss(x) if ismissing(x) return missing end let v = tryparse(Float64,x); if isnothing(v) missing else v end end end testscores.Mean_Scale_Score = floatormiss.(testscores.Mean_Scale_Score)
133505-element Vector{Union{Missing, Float64}}:
missing
missing
missing
missing
missing
missing
missing
missing
missing
missing
⋮
missing
missing
missing
missing
missing
missing
missing
missing
missing
pusdentities = @subset(entities,.! ismissing.(:District_Name) .&& :District_Name .== "Pasadena Unified" .&& .! ismissing.(:School_Name)) alhambraentities = @subset(entities,.! ismissing.(:District_Name) .&& :District_Name .== "Alhambra Unified" .&& .! ismissing.(:School_Name)) glendaleentities = @subset(entities,.! ismissing.(:District_Name) .&& :District_Name .== "Glendale Unified" .&& .! ismissing.(:School_Name)) pusdtests = leftjoin(@select(pusdentities,:School_Code,:School_Name,:District_Name),testscores,on = :School_Code, matchmissing=:notequal) alhambratests = leftjoin(@select(alhambraentities,:School_Code,:School_Name,:District_Name),testscores,on = :School_Code, matchmissing=:notequal) glendaletests = leftjoin(@select(glendaleentities,:School_Code,:School_Name,:District_Name),testscores,on = :School_Code, matchmissing=:notequal)
| Row | School_Code | School_Name | District_Name | County_Code | District_Code | Filler | Test_Year | Subgroup_ID | Test_Type | Total_CAASPP_Enrollment | Total_Tested_At_Entity_Level | Total_Tested_at_Subgroup_Level | Grade | Test_Id | CAASPP_Reported_Enrollment | Students_Tested | Mean_Scale_Score | Percentage_Standard_Exceeded | Percentage_Standard_Met | Percentage_Standard_Met_and_Above | Percentage_Standard_Nearly_Met | Percentage_Standard_Not_Met | Students_with_Scores | Area_1_Percentage_Above_Standard | Area_1_Percentage_At_or_Near_Standard | Area_1_Percentage_Below_Standard | Area_2_Percentage_Above_Standard | Area_2_Percentage_At_or_Near_Standard | Area_2_Percentage_Below_Standard | Area_3_Percentage_Above_Standard | Area_3_Percentage_At_or_Near_Standard | Area_3_Percentage_Below_Standard | Area_4_Percentage_Above_Standard | Area_4_Percentage_At_or_Near_Standard | Area_4_Percentage_Below_Standard | Total_Tested_with_Scores | Area_1_Percentage_Near_Standard | Area_2_Percentage_Near_Standard | Area_3_Percentage_Near_Standard | Area_4_Percentage_Near_Standard | County_Code_District_Code_School_Code_Filler_Test_Year_Student_Group_ID_Test_Type_Total_Tested_at_Reporting_Level_Total_Tested_with_Scores_at_Reporting_Level_Grade_Test_ID_Students_Enrolled_Students_Tested_Mean_Scale_Score_Percentage_Standard_Exceeded_Percentage_Standard_Met_Percentage_Standard_Met_and_Above_Percentage_Standard_Nearly_Met_Percentage_Standard_Not_Met_Students_with_Scores_Area_1_Percentage_Above_Standard_Area_1_Percentage_Near_Standard_Area_1_Percentage_Below_Standard_Area_2_Percentage_Above_Standard_Area_2_Percentage_Near_Standard_Area_2_Percentage_Below_Standard_Area_3_Percentage_Above_Standard_Area_3_Percentage_Near_Standard_Area_3_Percentage_Below_Standard_Area_4_Percentage_Above_Standard_Area_4_Percentage_Near_Standard_Area_4_Percentage_Below_Standard_Type_ID |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Int64 | String? | String? | Int64? | Int64? | Missing | Int64? | Int64? | String? | Int64? | Any | Int64? | Int64? | Int64? | Any | Any | Float64? | String? | String? | String? | String? | String? | Any | String? | String? | String? | String? | String? | String? | String? | String? | String? | String? | String? | String? | Any | String? | String? | String? | String? | Missing | |
| 1 | 1932144 | Crescenta Valley High | Glendale Unified | 19 | 64568 | missing | 2015 | 1 | B | 1356 | 1318 | 1318 | 8 | 1 | 11 | 0 | missing | * | * | * | * | * | 0 | * | * | * | * | * | * | * | * | * | * | * | * | missing | missing | missing | missing | missing | missing |
| 2 | 1932144 | Crescenta Valley High | Glendale Unified | 19 | 64568 | missing | 2015 | 1 | B | 1356 | 1318 | 1318 | 8 | 2 | 11 | 0 | missing | * | * | * | * | * | 0 | * | * | * | * | * | * | * | * | * | * | * | * | missing | missing | missing | missing | missing | missing |
| 3 | 1932144 | Crescenta Valley High | Glendale Unified | 19 | 64568 | missing | 2015 | 1 | B | 1356 | 1318 | 1318 | 11 | 2 | 680 | 665 | 2656.3 | 33 | 29 | 62 | 21 | 17 | 664 | 45 | 34 | 20 | 37 | 48 | 15 | 39 | 50 | 10 | 0 | 0 | 0 | missing | missing | missing | missing | missing | missing |
| 4 | 1932144 | Crescenta Valley High | Glendale Unified | 19 | 64568 | missing | 2015 | 1 | B | 1356 | 1318 | 1318 | 11 | 1 | 680 | 667 | 2672.2 | 52 | 33 | 84 | 12 | 4 | 666 | 56 | 37 | 7 | 61 | 32 | 6 | 30 | 63 | 6 | 62 | 34 | 4 | missing | missing | missing | missing | missing | missing |
| 5 | 1932144 | Crescenta Valley High | Glendale Unified | 19 | 64568 | missing | 2015 | 1 | B | 1356 | 1318 | 1318 | 13 | 1 | 691 | 667 | missing | 52 | 33 | 84 | 12 | 4 | 666 | 56 | 37 | 7 | 61 | 32 | 6 | 30 | 63 | 6 | 62 | 34 | 4 | missing | missing | missing | missing | missing | missing |
| 6 | 1932144 | Crescenta Valley High | Glendale Unified | 19 | 64568 | missing | 2015 | 1 | B | 1356 | 1318 | 1318 | 13 | 2 | 691 | 665 | missing | 33 | 29 | 62 | 21 | 17 | 664 | 45 | 34 | 20 | 37 | 48 | 15 | 39 | 50 | 10 | 0 | 0 | 0 | missing | missing | missing | missing | missing | missing |
| 7 | 1932144 | Crescenta Valley High | Glendale Unified | 19 | 64568 | missing | 2015 | 3 | B | 1356 | 1318 | 657 | 8 | 1 | 11 | 0 | missing | * | * | * | * | * | 0 | * | * | * | * | * | * | * | * | * | * | * | * | missing | missing | missing | missing | missing | missing |
| 8 | 1932144 | Crescenta Valley High | Glendale Unified | 19 | 64568 | missing | 2015 | 3 | B | 1356 | 1318 | 657 | 8 | 2 | 11 | 0 | missing | * | * | * | * | * | 0 | * | * | * | * | * | * | * | * | * | * | * | * | missing | missing | missing | missing | missing | missing |
| 9 | 1932144 | Crescenta Valley High | Glendale Unified | 19 | 64568 | missing | 2015 | 3 | B | 1356 | 1318 | 657 | 11 | 2 | 680 | 332 | 2655.1 | 36 | 25 | 61 | 20 | 19 | 331 | 44 | 31 | 25 | 39 | 47 | 15 | 41 | 47 | 11 | 0 | 0 | 0 | missing | missing | missing | missing | missing | missing |
| 10 | 1932144 | Crescenta Valley High | Glendale Unified | 19 | 64568 | missing | 2015 | 3 | B | 1356 | 1318 | 657 | 11 | 1 | 680 | 331 | 2658.5 | 45 | 35 | 80 | 14 | 6 | 331 | 52 | 40 | 9 | 53 | 38 | 9 | 29 | 63 | 8 | 55 | 39 | 6 | missing | missing | missing | missing | missing | missing |
| 11 | 1932144 | Crescenta Valley High | Glendale Unified | 19 | 64568 | missing | 2015 | 3 | B | 1356 | 1318 | 657 | 13 | 1 | 691 | 331 | missing | 45 | 35 | 80 | 14 | 6 | 331 | 52 | 40 | 9 | 53 | 38 | 9 | 29 | 63 | 8 | 55 | 39 | 6 | missing | missing | missing | missing | missing | missing |
| 12 | 1932144 | Crescenta Valley High | Glendale Unified | 19 | 64568 | missing | 2015 | 3 | B | 1356 | 1318 | 657 | 13 | 2 | 691 | 332 | missing | 36 | 25 | 61 | 20 | 19 | 331 | 44 | 31 | 25 | 39 | 47 | 15 | 41 | 47 | 11 | 0 | 0 | 0 | missing | missing | missing | missing | missing | missing |
| 13 | 1932144 | Crescenta Valley High | Glendale Unified | 19 | 64568 | missing | 2015 | 4 | B | 1356 | 1318 | 661 | 8 | 2 | 11 | 0 | missing | * | * | * | * | * | 0 | * | * | * | * | * | * | * | * | * | * | * | * | missing | missing | missing | missing | missing | missing |
| ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ |
| 47359 | 6061311 | Woodrow Wilson Middle | Glendale Unified | 19 | 64568 | missing | 2019 | 203 | B | missing | 34 | missing | 13 | 2 | 34 | 34 | missing | 41.18 | 32.35 | 73.53 | 11.76 | 14.71 | 34 | 55.88 | missing | 23.53 | 41.18 | missing | 17.65 | 47.06 | missing | 8.82 | 0.00 | missing | 0.00 | 34 | 20.59 | 41.18 | 44.12 | 0.00 | missing |
| 47360 | 6061311 | Woodrow Wilson Middle | Glendale Unified | 19 | 64568 | missing | 2019 | 204 | B | missing | 146 | missing | 13 | 2 | 147 | 146 | missing | 13.70 | 15.75 | 29.45 | 22.60 | 47.95 | 146 | 18.49 | missing | 53.42 | 16.44 | missing | 48.63 | 17.12 | missing | 41.78 | 0.00 | missing | 0.00 | 146 | 28.08 | 34.93 | 41.10 | 0.00 | missing |
| 47361 | 6061311 | Woodrow Wilson Middle | Glendale Unified | 19 | 64568 | missing | 2019 | 205 | B | missing | * | missing | 13 | 2 | * | * | missing | * | * | * | * | * | * | * | missing | * | * | missing | * | * | missing | * | * | missing | * | * | * | * | * | * | missing |
| 47362 | 6061311 | Woodrow Wilson Middle | Glendale Unified | 19 | 64568 | missing | 2019 | 206 | B | missing | 559 | missing | 13 | 2 | 562 | 559 | missing | 21.65 | 19.14 | 40.79 | 24.33 | 34.88 | 559 | 29.16 | missing | 42.04 | 21.65 | missing | 35.42 | 22.18 | missing | 28.98 | 0.00 | missing | 0.00 | 559 | 28.80 | 42.93 | 48.84 | 0.00 | missing |
| 47363 | 6061311 | Woodrow Wilson Middle | Glendale Unified | 19 | 64568 | missing | 2019 | 207 | B | missing | 5 | missing | 13 | 2 | 5 | 5 | missing | * | * | * | * | * | 5 | * | missing | * | * | missing | * | * | missing | * | * | missing | * | 5 | * | * | * | * | missing |
| 47364 | 6061311 | Woodrow Wilson Middle | Glendale Unified | 19 | 64568 | missing | 2019 | 220 | B | missing | 5 | missing | 13 | 2 | 5 | 5 | missing | * | * | * | * | * | 5 | * | missing | * | * | missing | * | * | missing | * | * | missing | * | 5 | * | * | * | * | missing |
| 47365 | 6061311 | Woodrow Wilson Middle | Glendale Unified | 19 | 64568 | missing | 2019 | 222 | B | missing | 31 | missing | 13 | 2 | 31 | 31 | missing | 48.39 | 25.81 | 74.19 | 19.35 | 6.45 | 31 | 54.84 | missing | 9.68 | 51.61 | missing | 6.45 | 45.16 | missing | 6.45 | 0.00 | missing | 0.00 | 31 | 35.48 | 41.94 | 48.39 | 0.00 | missing |
| 47366 | 6061311 | Woodrow Wilson Middle | Glendale Unified | 19 | 64568 | missing | 2019 | 223 | B | missing | 45 | missing | 13 | 2 | 45 | 45 | missing | 48.89 | 26.67 | 75.56 | 15.56 | 8.89 | 45 | 53.33 | missing | 13.33 | 51.11 | missing | 13.33 | 46.67 | missing | 8.89 | 0.00 | missing | 0.00 | 45 | 33.33 | 35.56 | 44.44 | 0.00 | missing |
| 47367 | 6061311 | Woodrow Wilson Middle | Glendale Unified | 19 | 64568 | missing | 2019 | 224 | B | missing | 53 | missing | 13 | 2 | 53 | 53 | missing | 35.85 | 16.98 | 52.83 | 18.87 | 28.30 | 53 | 43.40 | missing | 32.08 | 32.08 | missing | 28.30 | 35.85 | missing | 33.96 | 0.00 | missing | 0.00 | 53 | 24.53 | 39.62 | 30.19 | 0.00 | missing |
| 47368 | 6061311 | Woodrow Wilson Middle | Glendale Unified | 19 | 64568 | missing | 2019 | 225 | B | missing | * | missing | 13 | 2 | * | * | missing | * | * | * | * | * | * | * | missing | * | * | missing | * | * | missing | * | * | missing | * | * | * | * | * | * | missing |
| 47369 | 6061311 | Woodrow Wilson Middle | Glendale Unified | 19 | 64568 | missing | 2019 | 226 | B | missing | 306 | missing | 13 | 2 | 309 | 306 | missing | 33.66 | 22.88 | 56.54 | 25.16 | 18.30 | 306 | 38.89 | missing | 27.12 | 31.70 | missing | 23.86 | 33.01 | missing | 17.32 | 0.00 | missing | 0.00 | 306 | 33.99 | 44.44 | 49.67 | 0.00 | missing |
| 47370 | 6061311 | Woodrow Wilson Middle | Glendale Unified | 19 | 64568 | missing | 2019 | 227 | B | missing | 20 | missing | 13 | 2 | 20 | 20 | missing | 40.00 | 25.00 | 65.00 | 25.00 | 10.00 | 20 | 55.00 | missing | 15.00 | 35.00 | missing | 10.00 | 30.00 | missing | 20.00 | 0.00 | missing | 0.00 | 20 | 30.00 | 55.00 | 50.00 | 0.00 | missing |
We want points for each test... TestID = 1 means english, 2 means Math (for "smarter balanced" assessment). Let's group the tests by cohort, meaning TestYear - (Grade - 3) basically the year they were in 3rd grade which is the first year you take the test.
function plotschools(entities,testscores) for (testid,testname) in Iterators.zip([1,2],["English","Math"]) let pl = [] for sch in eachrow(entities) ourdf = @subset(testscores,:School_Code .== sch.School_Code .&& :Subgroup_ID .== 1 .&& :Grade .< 13 .&& :Test_Id .== testid .&& .!ismissing.(:Mean_Scale_Score)) # @show ourdf if nrow(ourdf) > 0 p = @df ourdf plot(:Grade,:Mean_Scale_Score; xlim=(3,12),ylim = (2250,2750),group=:Test_Year .- (:Grade .- 3),legend=false,size=(250,250), title="$(testname): $(sch.School_Name)",linewidth=3) p = @df ourdf scatter!(:Grade,:Mean_Scale_Score; xlim=(3,12),ylim = (2250,2750),group=:Test_Year .- (:Grade .- 3),legend=false,size=(250,250), title="$(testname): $(sch.School_Name)",markerstrokewidth=0) push!(pl,p) end end display(plot(pl...; size=(1500,1500))) end end end plotschools(pusdentities,@subset(pusdtests,:Subgroup_ID .== 1)) plotschools(alhambraentities,@subset(alhambratests,:Subgroup_ID .== 1))
Clearly there are some differences between schools. Since we are looking at the average across the school some of this will be because the demographic and socioeconomic mix of the students is different. For example the middle schoolers at the relative wealthy community of Sierra Madre Middle School are testing about the same as the high school 11th graders at Blair and Marshall Fundamental.
Also Alhambra schools appear to have higher achievement in general.
A considerable difference in overall average test scores can be attributed to a different mix of students. So let's break down the schools by demographic groups. The DemographicID in the students table describes the StudentGroups. We can break this down by:
"Economically Disadvantaged" vs "Not economically disadvantaged"
Race and ethnicity
Parent Education Level
For now, let's focus on math scores, and we'll iterate over every school, and output a graph that shows the average curve for each parent education level:
function plotedlevel(schools,scores,dist) edlevnames = Dict(90 => "No HSD", 91 => "HSD", 92 => "Some College", 93 => "College Grad", 94=>"Grad School") for sch in eachrow(schools) ourdf = @subset(scores,:Test_Id .== 2 .&& :School_Code .== sch.School_Code .&& in.(:Subgroup_ID ,Ref(90:94))) p = @df ourdf scatter(:Grade,:Mean_Scale_Score; xlim=(2.5,12), ylim=(2300,2800),title="$(dist)\nMath $(sch.School_Name)\nBy Parent Ed",label=false,markersize=3,size=(500,500)) for edlev in 90:94 subs = @subset(ourdf,:Subgroup_ID .== edlev .&& .!ismissing.(:Grade) .&& .! ismissing.(:Mean_Scale_Score)) if nrow(subs) < 4 continue end println("There are $(nrow(subs)) observations for $(sch.School_Name)") grades = collect(minimum(subs.Grade):maximum(subs.Grade)) if length(grades) > 2 l = loess(Float64.(subs.Grade),Float64.(subs.Mean_Scale_Score)) p = plot!(grades,map(x -> Loess.predict(l,x), Float64.(grades)); label=edlevnames[edlev], linewidth=3) end end display(p) end end plotedlevel(pusdentities,pusdtests,"PUSD") plotedlevel(alhambraentities,alhambratests,"Alhambra") plotedlevel(glendaleentities,glendaletests,"Glendale")
There are 4 observations for CIS Academy There are 5 observations for CIS Academy There are 8 observations for Sierra Madre Middle There are 15 observations for Sierra Madre Middle There are 15 observations for Sierra Madre Middle There are 15 observations for Sierra Madre Middle There are 20 observations for Blair High There are 20 observations for Blair High There are 20 observations for Blair High There are 20 observations for Blair High There are 17 observations for Blair High There are 20 observations for Marshall Fundamental There are 20 observations for Marshall Fundamental There are 20 observations for Marshall Fundamental There are 20 observations for Marshall Fundamental There are 20 observations for Marshall Fundamental There are 13 observations for Norma Coombs Elementary There are 11 observations for Norma Coombs Elementary There are 9 observations for Norma Coombs Elementary There are 5 observations for Norma Coombs Elementary There are 5 observations for John Muir High There are 5 observations for John Muir High There are 5 observations for John Muir High There are 4 observations for John Muir High There are 5 observations for Rose City High (Continuation) There are 5 observations for Rose City High (Continuation) There are 5 observations for Pasadena High There are 5 observations for Pasadena High There are 5 observations for Pasadena High There are 5 observations for Pasadena High There are 5 observations for Pasadena High There are 6 observations for Altadena Elementary There are 11 observations for Altadena Elementary There are 5 observations for Don Benito Fundamental There are 15 observations for Don Benito Fundamental There are 15 observations for Don Benito Fundamental There are 15 observations for Don Benito Fundamental There are 4 observations for Field (Eugene) Elementary There are 14 observations for Field (Eugene) Elementary There are 14 observations for Field (Eugene) Elementary There are 11 observations for Hamilton Elementary There are 15 observations for Hamilton Elementary There are 15 observations for Hamilton Elementary There are 15 observations for Hamilton Elementary There are 13 observations for Jackson Elementary There are 14 observations for Jackson Elementary There are 5 observations for Jackson Elementary There are 10 observations for Longfellow (Henry W.) Elementary There are 14 observations for Longfellow (Henry W.) Elementary There are 11 observations for Longfellow (Henry W.) Elementary There are 9 observations for Longfellow (Henry W.) Elementary There are 15 observations for Madison Elementary There are 15 observations for Madison Elementary There are 4 observations for San Rafael Elementary There are 12 observations for San Rafael Elementary There are 15 observations for San Rafael Elementary There are 6 observations for Sierra Madre Elementary There are 13 observations for Sierra Madre Elementary There are 15 observations for Sierra Madre Elementary There are 15 observations for Sierra Madre Elementary There are 15 observations for Washington Middle There are 15 observations for Washington Middle There are 11 observations for Washington Middle There are 4 observations for Washington Middle There are 12 observations for Webster Elementary There are 13 observations for Webster Elementary There are 12 observations for Webster Elementary There are 9 observations for Webster Elementary There are 13 observations for Willard Elementary There are 15 observations for Willard Elementary There are 15 observations for Willard Elementary There are 15 observations for Willard Elementary There are 7 observations for Willard Elementary There are 15 observations for Charles W. Eliot Middle There are 15 observations for Charles W. Eliot Middle There are 15 observations for Charles W. Eliot Middle There are 14 observations for Charles W. Eliot Middle There are 5 observations for Charles W. Eliot Middle There are 15 observations for Washington Elementary There are 15 observations for Washington Elementary There are 27 observations for McKinley There are 30 observations for McKinley There are 30 observations for McKinley There are 30 observations for McKinley There are 29 observations for McKinley There are 5 observations for Alhambra High There are 5 observations for Alhambra High There are 5 observations for Alhambra High There are 5 observations for Alhambra High There are 5 observations for Alhambra High There are 5 observations for Mark Keppel High There are 5 observations for Mark Keppel High There are 5 observations for Mark Keppel High There are 5 observations for Mark Keppel High There are 5 observations for Mark Keppel High There are 5 observations for San Gabriel High There are 5 observations for San Gabriel High There are 5 observations for San Gabriel High There are 5 observations for San Gabriel High There are 5 observations for San Gabriel High There are 29 observations for Brightwood Elementary There are 30 observations for Brightwood Elementary There are 30 observations for Brightwood Elementary There are 30 observations for Brightwood Elementary There are 26 observations for Emery Park Elementary There are 29 observations for Emery Park Elementary There are 12 observations for Emery Park Elementary There are 29 observations for Fremont Elementary There are 28 observations for Fremont Elementary There are 14 observations for Fremont Elementary There are 27 observations for Garfield Elementary There are 30 observations for Garfield Elementary There are 27 observations for Garfield Elementary There are 8 observations for Garfield Elementary There are 27 observations for Granada Elementary There are 17 observations for Granada Elementary There are 26 observations for Granada Elementary There are 16 observations for Marguerita Elementary There are 30 observations for Marguerita Elementary There are 26 observations for Marguerita Elementary There are 22 observations for Marguerita Elementary There are 18 observations for Martha Baldwin Elementary There are 30 observations for Martha Baldwin Elementary There are 30 observations for Martha Baldwin Elementary There are 30 observations for Martha Baldwin Elementary There are 19 observations for Martha Baldwin Elementary There are 7 observations for Monterey Highlands Elementary There are 30 observations for Monterey Highlands Elementary There are 28 observations for Monterey Highlands Elementary There are 30 observations for Monterey Highlands Elementary There are 30 observations for Monterey Highlands Elementary There are 7 observations for Park Elementary There are 30 observations for Park Elementary There are 30 observations for Park Elementary There are 30 observations for Park Elementary There are 9 observations for Park Elementary There are 23 observations for Ramona Elementary There are 30 observations for Ramona Elementary There are 30 observations for Ramona Elementary There are 30 observations for Ramona Elementary There are 11 observations for Ramona Elementary There are 30 observations for Repetto Elementary There are 29 observations for Repetto Elementary There are 30 observations for Repetto Elementary There are 25 observations for Repetto Elementary There are 19 observations for William Northrup Elementary There are 30 observations for William Northrup Elementary There are 26 observations for William Northrup Elementary There are 26 observations for William Northrup Elementary There are 30 observations for Ynez Elementary There are 30 observations for Ynez Elementary There are 28 observations for Ynez Elementary There are 28 observations for Ynez Elementary There are 5 observations for Crescenta Valley High There are 5 observations for Crescenta Valley High There are 5 observations for Crescenta Valley High There are 5 observations for Crescenta Valley High There are 5 observations for Daily (Allan F.) High (Continuation) There are 5 observations for Daily (Allan F.) High (Continuation) There are 5 observations for Daily (Allan F.) High (Continuation) There are 5 observations for Glendale High There are 5 observations for Glendale High There are 5 observations for Glendale High There are 5 observations for Glendale High There are 5 observations for Glendale High There are 5 observations for Herbert Hoover High There are 5 observations for Herbert Hoover High There are 5 observations for Herbert Hoover High There are 5 observations for Herbert Hoover High There are 5 observations for Herbert Hoover High There are 5 observations for Anderson W. Clark Magnet High There are 5 observations for Anderson W. Clark Magnet High There are 5 observations for Anderson W. Clark Magnet High There are 5 observations for Anderson W. Clark Magnet High There are 20 observations for Balboa Elementary There are 19 observations for Balboa Elementary There are 20 observations for Balboa Elementary There are 17 observations for Balboa Elementary There are 20 observations for Cerritos Elementary There are 19 observations for Cerritos Elementary There are 9 observations for Cerritos Elementary There are 15 observations for Columbus Elementary There are 14 observations for Columbus Elementary There are 15 observations for Columbus Elementary There are 8 observations for Columbus Elementary There are 20 observations for Dunsmore Elementary There are 20 observations for Dunsmore Elementary There are 4 observations for Thomas Edison Elementary There are 20 observations for Thomas Edison Elementary There are 20 observations for Thomas Edison Elementary There are 20 observations for Thomas Edison Elementary There are 17 observations for Thomas Edison Elementary There are 10 observations for Benjamin Franklin Elementary There are 17 observations for Benjamin Franklin Elementary There are 14 observations for Benjamin Franklin Elementary There are 17 observations for John C. Fremont Elementary There are 20 observations for John C. Fremont Elementary There are 20 observations for John C. Fremont Elementary There are 12 observations for Glenoaks Elementary There are 20 observations for Glenoaks Elementary There are 20 observations for Glenoaks Elementary There are 20 observations for Thomas Jefferson Elementary There are 10 observations for Thomas Jefferson Elementary There are 20 observations for Thomas Jefferson Elementary There are 13 observations for Thomas Jefferson Elementary There are 15 observations for Mark Keppel Elementary There are 15 observations for Mark Keppel Elementary There are 15 observations for Mark Keppel Elementary There are 15 observations for Mark Keppel Elementary There are 5 observations for La Crescenta Elementary There are 20 observations for La Crescenta Elementary There are 20 observations for La Crescenta Elementary There are 11 observations for Abraham Lincoln Elementary There are 20 observations for Abraham Lincoln Elementary There are 20 observations for Abraham Lincoln Elementary There are 6 observations for Horace Mann Elementary There are 15 observations for Horace Mann Elementary There are 14 observations for Horace Mann Elementary There are 15 observations for Horace Mann Elementary There are 5 observations for Horace Mann Elementary There are 15 observations for John Marshall Elementary There are 8 observations for John Marshall Elementary There are 15 observations for John Marshall Elementary There are 10 observations for John Marshall Elementary There are 20 observations for Monte Vista Elementary There are 20 observations for Monte Vista Elementary There are 5 observations for Mountain Avenue Elementary There are 20 observations for Mountain Avenue Elementary There are 20 observations for Mountain Avenue Elementary There are 20 observations for John Muir Elementary There are 20 observations for John Muir Elementary There are 20 observations for John Muir Elementary There are 16 observations for John Muir Elementary There are 20 observations for Valley View Elementary There are 20 observations for Valley View Elementary There are 18 observations for Verdugo Woodlands Elementary There are 20 observations for Verdugo Woodlands Elementary There are 20 observations for Verdugo Woodlands Elementary There are 15 observations for R. D. White Elementary There are 14 observations for R. D. White Elementary There are 15 observations for R. D. White Elementary There are 15 observations for R. D. White Elementary There are 14 observations for Theodore Roosevelt Middle There are 15 observations for Theodore Roosevelt Middle There are 15 observations for Theodore Roosevelt Middle There are 15 observations for Theodore Roosevelt Middle There are 12 observations for Theodore Roosevelt Middle There are 10 observations for Eleanor J. Toll Middle There are 15 observations for Eleanor J. Toll Middle There are 15 observations for Eleanor J. Toll Middle There are 15 observations for Eleanor J. Toll Middle There are 15 observations for Eleanor J. Toll Middle There are 10 observations for Rosemont Middle There are 10 observations for Rosemont Middle There are 10 observations for Rosemont Middle There are 10 observations for Rosemont Middle There are 12 observations for Woodrow Wilson Middle There are 15 observations for Woodrow Wilson Middle There are 15 observations for Woodrow Wilson Middle There are 15 observations for Woodrow Wilson Middle There are 15 observations for Woodrow Wilson Middle
function plotdistrictedlev(tests,name) edlevnames = Dict(90 => "No HSD", 91 => "HSD", 92 => "Some College", 93 => "College Grad", 94=>"Grad School") p = @df tests scatter(:Grade,:Mean_Scale_Score; xlim=(2.5,12), ylim=(2300,2800), title="$(name)\nMath Scores") for edlev in keys(edlevnames) subs = @subset(tests, :Subgroup_ID .== edlev .&& .! ismissing.(:Grade) .&& .! ismissing.(:Mean_Scale_Score)) grades = collect(3:11) l = loess(Float64.(subs.Grade), Float64.(subs.Mean_Scale_Score)) p = plot!(grades,map(x -> Loess.predict(l,x), Float64.(grades)); label = edlevnames[edlev],linewidth=3) end display(p) p end p1 = plotdistrictedlev(@subset(pusdtests,:Test_Id .== 2),"PUSD") p2 = plotdistrictedlev(@subset(alhambratests,:Test_Id .== 2),"Alhambra") p3 = plotdistrictedlev(@subset(glendaletests,:Test_Id .== 2),"Glendale") plot(p1,p2,p3; layout=(1,3),legend=:topleft,size=(2000,500))
One reason to create a mathematical model is that it can simplify your understanding of a problem. For example a drag coefficient is a kind of summary of the entire pressure field on the surface of an object... integrated together it results in a certain amount of drag.
In our case we will assume that variation from year to year is low, so that we can pool the years together, and get a summary of how quickly students are learning math, by fitting a line through different groups of grades: (3,4,5) (6,7,8) and (8,11) we can summarize the general trend in each school as just 6 numbers (levels and slopes of each group), rather than the details of potentially hundreds of measurements.
We will assume the level and slope of these three lines depends on parents education level only, since we don't have individual student data with covariates where we could fit a more complicated model.
@model function schoolmod(school,parented,grade,meanscore,nschools,nedlev) c4avg ~ Normal(2450.0,200.0) c7avg ~ Normal(2500.0,200.0) c11avg ~ Normal(2550.0,200.0) m4avg ~ Normal(0.0,100.0) m7avg ~ Normal(0.0,100.0) msd ~ Gamma(10.0,20.0/9.0) csd ~ Gamma(10.0,20.0/9.0) c4 ~ MvNormal(repeat([0.0],nschools),csd^2*I(nschools)) c7 ~ MvNormal(repeat([0.0],nschools),csd^2*I(nschools)) c11 ~ MvNormal(repeat([0.0],nschools),csd^2*I(nschools)) m4 ~ MvNormal(repeat([0.0],nschools),msd^2*I(nschools)) m7 ~ MvNormal(repeat([0.0],nschools),msd^2*I(nschools)) edlevmult ~ filldist(Gamma(20.0,1.0/19),nschools) cedlev4 ~ MvNormal(repeat([0.0],nedlev),Diagonal([50.0,50.0,50.0,50.0,50.0].^2)) cedlev7 ~ MvNormal(repeat([0.0],nedlev),Diagonal([50.0,50.0,50.0,50.0,50.0].^2)) cedlev11 ~ MvNormal(repeat([0.0],nedlev),Diagonal([50.0,50.0,50.0,50.0,50.0].^2)) medlev4 ~ MvNormal(repeat([0.0],nedlev),Diagonal([20.0,20.0,20.0,20.0,20.0].^2)) medlev7 ~ MvNormal(repeat([0.0],nedlev),Diagonal([20.0,20.0,20.0,20.0,20.0].^2)) s ~ Gamma(10.0,50/10.0) preds = zeros(eltype(s),length(meanscore)) for (i,(s,ed,g,sc)) in Iterators.enumerate(Iterators.zip(school,parented,grade,meanscore)) if ed == 1 # base case med4 = med7 = ced4 = ced7 = ced11 = zero(eltype(cedlev4)) else ced4 = cedlev4[ed] * edlevmult[s] ced7 = cedlev7[ed] * edlevmult[s] ced11 = cedlev11[ed] * edlevmult[s] med4 = medlev4[ed] * edlevmult[s] med7 = medlev7[ed] * edlevmult[s] end if g in (3,4,5) preds[i] = (c4avg + c4[s] + ced4) + (m4avg + m4[s] + med4) * (g-4) elseif g in (6,7,8) preds[i] = (c7avg + c7[s] + ced7) + (m7avg + m7[s] + med7) * (g-7) elseif g == 11 preds[i] = (c11avg + c11[s] + ced11) end end meanscore ~ MvNormal(preds,s^2 * I(length(preds))) end edlevtests = @chain begin @subset(testscores,.!ismissing.(:Mean_Scale_Score) .&& :Test_Id .== 2 .&& :School_Code .!= 0 .&& in.(:Subgroup_ID,Ref((90,91,92,93,94)))) @transform(:Edlev = :Subgroup_ID .- 89) @transform(:Score = Float64.(:Mean_Scale_Score)) end schoolids = map(Pair,edlevtests.District_Code,edlevtests.School_Code) uniqueids = unique(schoolids) schoolnames = Dict() schooldists = Dict() schoolgrades = Dict() for (i,k) in Iterators.enumerate(uniqueids) schoolnames[i] = "Unknown" schoolnames[k] = "Unknown" for g in 1:11 schoolgrades[k => g] = false end schooldists[k] = "Unknown" schooldists[i] = "Unknown" end for r in eachrow(entities) schoolnames[r.District_Code => r.School_Code] = r.School_Name schooldists[r.District_Code => r.School_Code] = r.District_Name for grade in 1:11 schoolgrades[(r.District_Code => r.School_Code) => grade ] = false end end for i in 1:length(uniqueids) schooldists[i] = schooldists[uniqueids[i]] end for r in eachrow(testscores) schoolgrades[(r.District_Code => r.School_Code) => r.Grade ] = true end schoolDict = Dict() merge!(schoolDict,Dict(map(Pair,uniqueids,1:length(uniqueids)))) merge!(schoolDict,Dict(map(Pair,1:length(uniqueids),uniqueids))) for i in 1:length(uniqueids) schoolnames[i] = haskey(schoolnames,schoolDict[i]) ? schoolnames[schoolDict[i]] : "Unknown"; end smod = schoolmod(map(x -> schoolDict[x],schoolids),edlevtests.Edlev,edlevtests.Grade,edlevtests.Score,length(uniqueids),5) Turing.setadbackend(:reversediff) Turing.setrdcache(true) if ispath("cache/mcmcchain.dat") ch = deserialize("cache/mcmcchain.dat") else ch = sample(smod,NUTS(900,.8),MCMCThreads(),500,2) end
Chains MCMC chain (500×519×2 Array{Float64, 3}):
Iterations = 901:1:1400
Number of chains = 2
Samples per chain = 500
Wall duration = 1762.8 seconds
Compute duration = 3408.96 seconds
parameters = c4avg, c7avg, c11avg, m4avg, m7avg, msd, csd, c4[1], c4
[2], c4[3], c4[4], c4[5], c4[6], c4[7], c4[8], c4[9], c4[10], c4[11], c4[12
], c4[13], c4[14], c4[15], c4[16], c4[17], c4[18], c4[19], c4[20], c4[21],
c4[22], c4[23], c4[24], c4[25], c4[26], c4[27], c4[28], c4[29], c4[30], c4[
31], c4[32], c4[33], c4[34], c4[35], c4[36], c4[37], c4[38], c4[39], c4[40]
, c4[41], c4[42], c4[43], c4[44], c4[45], c4[46], c4[47], c4[48], c4[49], c
4[50], c4[51], c4[52], c4[53], c4[54], c4[55], c4[56], c4[57], c4[58], c4[5
9], c4[60], c4[61], c4[62], c4[63], c4[64], c4[65], c4[66], c4[67], c4[68],
c4[69], c4[70], c4[71], c4[72], c4[73], c4[74], c4[75], c4[76], c4[77], c4
[78], c4[79], c7[1], c7[2], c7[3], c7[4], c7[5], c7[6], c7[7], c7[8], c7[9]
, c7[10], c7[11], c7[12], c7[13], c7[14], c7[15], c7[16], c7[17], c7[18], c
7[19], c7[20], c7[21], c7[22], c7[23], c7[24], c7[25], c7[26], c7[27], c7[2
8], c7[29], c7[30], c7[31], c7[32], c7[33], c7[34], c7[35], c7[36], c7[37],
c7[38], c7[39], c7[40], c7[41], c7[42], c7[43], c7[44], c7[45], c7[46], c7
[47], c7[48], c7[49], c7[50], c7[51], c7[52], c7[53], c7[54], c7[55], c7[56
], c7[57], c7[58], c7[59], c7[60], c7[61], c7[62], c7[63], c7[64], c7[65],
c7[66], c7[67], c7[68], c7[69], c7[70], c7[71], c7[72], c7[73], c7[74], c7[
75], c7[76], c7[77], c7[78], c7[79], c11[1], c11[2], c11[3], c11[4], c11[5]
, c11[6], c11[7], c11[8], c11[9], c11[10], c11[11], c11[12], c11[13], c11[1
4], c11[15], c11[16], c11[17], c11[18], c11[19], c11[20], c11[21], c11[22],
c11[23], c11[24], c11[25], c11[26], c11[27], c11[28], c11[29], c11[30], c1
1[31], c11[32], c11[33], c11[34], c11[35], c11[36], c11[37], c11[38], c11[3
9], c11[40], c11[41], c11[42], c11[43], c11[44], c11[45], c11[46], c11[47],
c11[48], c11[49], c11[50], c11[51], c11[52], c11[53], c11[54], c11[55], c1
1[56], c11[57], c11[58], c11[59], c11[60], c11[61], c11[62], c11[63], c11[6
4], c11[65], c11[66], c11[67], c11[68], c11[69], c11[70], c11[71], c11[72],
c11[73], c11[74], c11[75], c11[76], c11[77], c11[78], c11[79], m4[1], m4[2
], m4[3], m4[4], m4[5], m4[6], m4[7], m4[8], m4[9], m4[10], m4[11], m4[12],
m4[13], m4[14], m4[15], m4[16], m4[17], m4[18], m4[19], m4[20], m4[21], m4
[22], m4[23], m4[24], m4[25], m4[26], m4[27], m4[28], m4[29], m4[30], m4[31
], m4[32], m4[33], m4[34], m4[35], m4[36], m4[37], m4[38], m4[39], m4[40],
m4[41], m4[42], m4[43], m4[44], m4[45], m4[46], m4[47], m4[48], m4[49], m4[
50], m4[51], m4[52], m4[53], m4[54], m4[55], m4[56], m4[57], m4[58], m4[59]
, m4[60], m4[61], m4[62], m4[63], m4[64], m4[65], m4[66], m4[67], m4[68], m
4[69], m4[70], m4[71], m4[72], m4[73], m4[74], m4[75], m4[76], m4[77], m4[7
8], m4[79], m7[1], m7[2], m7[3], m7[4], m7[5], m7[6], m7[7], m7[8], m7[9],
m7[10], m7[11], m7[12], m7[13], m7[14], m7[15], m7[16], m7[17], m7[18], m7[
19], m7[20], m7[21], m7[22], m7[23], m7[24], m7[25], m7[26], m7[27], m7[28]
, m7[29], m7[30], m7[31], m7[32], m7[33], m7[34], m7[35], m7[36], m7[37], m
7[38], m7[39], m7[40], m7[41], m7[42], m7[43], m7[44], m7[45], m7[46], m7[4
7], m7[48], m7[49], m7[50], m7[51], m7[52], m7[53], m7[54], m7[55], m7[56],
m7[57], m7[58], m7[59], m7[60], m7[61], m7[62], m7[63], m7[64], m7[65], m7
[66], m7[67], m7[68], m7[69], m7[70], m7[71], m7[72], m7[73], m7[74], m7[75
], m7[76], m7[77], m7[78], m7[79], edlevmult[1], edlevmult[2], edlevmult[3]
, edlevmult[4], edlevmult[5], edlevmult[6], edlevmult[7], edlevmult[8], edl
evmult[9], edlevmult[10], edlevmult[11], edlevmult[12], edlevmult[13], edle
vmult[14], edlevmult[15], edlevmult[16], edlevmult[17], edlevmult[18], edle
vmult[19], edlevmult[20], edlevmult[21], edlevmult[22], edlevmult[23], edle
vmult[24], edlevmult[25], edlevmult[26], edlevmult[27], edlevmult[28], edle
vmult[29], edlevmult[30], edlevmult[31], edlevmult[32], edlevmult[33], edle
vmult[34], edlevmult[35], edlevmult[36], edlevmult[37], edlevmult[38], edle
vmult[39], edlevmult[40], edlevmult[41], edlevmult[42], edlevmult[43], edle
vmult[44], edlevmult[45], edlevmult[46], edlevmult[47], edlevmult[48], edle
vmult[49], edlevmult[50], edlevmult[51], edlevmult[52], edlevmult[53], edle
vmult[54], edlevmult[55], edlevmult[56], edlevmult[57], edlevmult[58], edle
vmult[59], edlevmult[60], edlevmult[61], edlevmult[62], edlevmult[63], edle
vmult[64], edlevmult[65], edlevmult[66], edlevmult[67], edlevmult[68], edle
vmult[69], edlevmult[70], edlevmult[71], edlevmult[72], edlevmult[73], edle
vmult[74], edlevmult[75], edlevmult[76], edlevmult[77], edlevmult[78], edle
vmult[79], cedlev4[1], cedlev4[2], cedlev4[3], cedlev4[4], cedlev4[5], cedl
ev7[1], cedlev7[2], cedlev7[3], cedlev7[4], cedlev7[5], cedlev11[1], cedlev
11[2], cedlev11[3], cedlev11[4], cedlev11[5], medlev4[1], medlev4[2], medle
v4[3], medlev4[4], medlev4[5], medlev7[1], medlev7[2], medlev7[3], medlev7[
4], medlev7[5], s
internals = lp, n_steps, is_accept, acceptance_rate, log_density, h
amiltonian_energy, hamiltonian_energy_error, max_hamiltonian_energy_error,
tree_depth, numerical_error, step_size, nom_step_size
Summary Statistics
parameters mean std naive_se mcse ess rh
at ⋯
Symbol Float64 Float64 Float64 Float64 Float64 Float
64 ⋯
c4avg 2440.4255 5.8882 0.1862 0.6243 71.1144 1.03
24 ⋯
c7avg 2513.8311 6.3879 0.2020 0.5647 119.5444 1.00
41 ⋯
c11avg 2514.1965 9.5378 0.3016 0.8242 114.0035 1.00
54 ⋯
m4avg 29.7964 2.5963 0.0821 0.1507 346.9391 1.00
55 ⋯
m7avg 21.9920 2.6512 0.0838 0.1394 344.6919 1.00
06 ⋯
msd 10.0978 0.9613 0.0304 0.0464 346.3260 1.00
38 ⋯
csd 40.0965 2.6541 0.0839 0.1197 551.0185 1.00
18 ⋯
c4[1] 1.0475 40.4098 1.2779 1.0450 1591.2607 0.99
95 ⋯
c4[2] 0.9106 41.3182 1.3066 1.2316 1198.0163 1.00
08 ⋯
c4[3] 0.8545 41.0887 1.2993 0.9990 2250.3768 1.00
00 ⋯
c4[4] 0.5061 40.6234 1.2846 0.7586 2965.5892 1.00
13 ⋯
c4[5] -1.0709 38.3358 1.2123 0.9495 1664.0303 1.00
07 ⋯
c4[6] 12.2531 6.7666 0.2140 0.6051 93.6259 1.03
40 ⋯
c4[7] 4.1737 7.3038 0.2310 0.6110 126.5082 1.01
20 ⋯
c4[8] -10.9930 7.3825 0.2335 0.5977 125.2130 1.01
83 ⋯
c4[9] 10.5860 11.1110 0.3514 0.5759 303.9863 1.00
51 ⋯
c4[10] -9.3139 6.8594 0.2169 0.5941 108.4339 1.01
93 ⋯
⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮
⋱
1 column and 490 rows om
itted
Quantiles
parameters 2.5% 25.0% 50.0% 75.0% 97.5%
Symbol Float64 Float64 Float64 Float64 Float64
c4avg 2429.3568 2436.1968 2440.1172 2444.4851 2452.6015
c7avg 2502.0066 2509.3511 2513.5854 2518.2346 2526.6647
c11avg 2494.7246 2508.0144 2514.3996 2520.6381 2532.9437
m4avg 24.7613 28.1452 29.7375 31.4657 34.9334
m7avg 16.7809 20.2899 21.9509 23.8360 27.0437
msd 8.3538 9.4444 10.0167 10.7269 12.1284
csd 35.3150 38.2139 39.9481 41.8059 45.5391
c4[1] -75.4480 -26.9471 1.1705 28.8115 78.9201
c4[2] -74.9908 -26.8124 0.0271 29.1100 81.6853
c4[3] -84.6190 -27.1160 1.6508 29.3042 77.5984
c4[4] -78.3777 -27.2266 0.2631 28.5506 77.4234
c4[5] -82.3857 -25.0379 -1.7541 24.0425 73.8144
c4[6] -1.0539 7.7063 12.5619 16.9552 25.5576
c4[7] -10.5137 -0.6167 4.5432 9.2025 18.1292
c4[8] -25.8019 -15.9027 -10.6718 -5.9972 2.7659
c4[9] -11.1233 3.2125 11.0321 18.5573 31.4475
c4[10] -22.7640 -14.1670 -8.9317 -4.5674 3.7168
⋮ ⋮ ⋮ ⋮ ⋮ ⋮
490 rows omitted
Now that we have fit our model, we have hundreds of samples of hundreds of parameters. Let's take a look at their distribution
for sch in 1:length(uniqueids) dist = schooldists[sch] p = plot(; linewidth=3, title = "$dist : $(schoolnames[sch])", xlim=(2.5,12),ylim=(2300,2700),legend=false) c4avg = mean(ch[:,Symbol("c4avg"),:]) m4avg = mean(ch[:,Symbol("m4avg"),:]) c7avg = mean(ch[:,Symbol("c7avg"),:]) m7avg = mean(ch[:,Symbol("m7avg"),:]) c11avg = mean(ch[:,Symbol("c11avg"),:]) edlevcolors = colorschemes[:tol_light] # a RG colorblind friendly pallette has4 = schoolgrades[schoolDict[sch] => 4] has7 = schoolgrades[schoolDict[sch] => 7] has11 = schoolgrades[schoolDict[sch] => 11] for ed in 1:5 c4 = mean(ch[:,Symbol("c4[$sch]"),:] + ch[:,Symbol("cedlev4[$ed]"),:] .* ch[:,Symbol("edlevmult[$sch]"),:]) + c4avg m4 = mean(ch[:,Symbol("m4[$sch]"),:] + ch[:,Symbol("medlev4[$ed]"),:] .* ch[:,Symbol("edlevmult[$sch]"),:]) + m4avg c7 = mean(ch[:,Symbol("c7[$sch]"),:] + ch[:,Symbol("cedlev7[$ed]"),:] .* ch[:,Symbol("edlevmult[$sch]"),:]) + c7avg m7 = mean(ch[:,Symbol("m7[$sch]"),:] + ch[:,Symbol("medlev7[$ed]"),:] .* ch[:,Symbol("edlevmult[$sch]"),:]) + m7avg c11 = mean(ch[:,Symbol("c11[$sch]"),:] + ch[:,Symbol("cedlev11[$ed]"),:] .* ch[:,Symbol("edlevmult[$sch]"),:]) + c11avg al = has4 ? 1.0 : .1 p = plot!([3,5], [c4-m4,c4+m4]; linewidth=6, color=edlevcolors[ed], xlim=(2.5,12),ylim=(2300,2800),legend=false, alpha=al) al = has7 ? 1.0 : .1 p = plot!([6,8], [c7-m7,c7+m7]; linewidth=6, color=edlevcolors[ed],alpha=al) al = has11 ? 1.0 : .1 p = scatter!([11], [c11]; markersize=6,alpha=al,color=edlevcolors[ed]) for i in 1:10 edmult = ch[i,Symbol("edlevmult[$sch]"),1] j = rand(1:length(ch)) c = ch[j,Symbol("c4[$sch]"),1] + ch[j,Symbol("cedlev4[$ed]"),1] * edmult + c4avg m = ch[j,Symbol("m4[$sch]"),1] + ch[j,Symbol("medlev4[$ed]"),1] * edmult + m4avg p = plot!([3,5],[c - m, c + m],label=false,alpha=.2,color=edlevcolors[ed]) c = ch[j,Symbol("c7[$sch]"),1] + ch[j,Symbol("cedlev7[$ed]"),1] * edmult + c7avg m = ch[j,Symbol("m7[$sch]"),1] + ch[j,Symbol("medlev7[$ed]"),1] * edmult + m7avg p = plot!([6,8],[c - m, c + m],label=false,alpha=.2,color=edlevcolors[ed]) c11 = ch[j,Symbol("c11[$sch]"),1] + ch[j,Symbol("cedlev11[$ed]"),1] * edmult + c11avg p = scatter!([randn()*.2 + 11],[c11],label=false,alpha=.2,color=edlevcolors[ed]) end end display(p) end